clear
set more off
pause on

cap log close
log using prep-election-regs.log, text replace

******************************************************************************************
* Name: 	prep-election-regs.do
* Purpose: 	this do file uses the merged election results and preps for analysis
* input:	ca-election-reclink-total.dta
******************************************************************************************

use ca-election-reclink-total.dta, clear

************************
* Classify occupations
************************

do 04a-classify-occ.do

save occ-temp, replace

************************
* Classify offices
************************

use occ-temp, clear

label define officecat 1 "County Supervisor" 2 "Mayor" 3 "City Council" 4 "Other city position" 5 "School Board/Superintendent" 6 "DA/Judge/Defender" ///
	7 "Rent control/Community Planning" 8 "Law enforcement" 9 "Advisory council"

label define officecat2 1 "County Supervisor" 2 "Mayor" 3 "City Council" 4 "Other city position" 5 "School Board/Superintendent" 6 "Other" 

forval y=1995/2014 {
	global year = `y'
	do 04b-classify-office.do
}

order LAST FIRST id

******************
* Assign Gender  
******************

** Census names file **

gen name = FIRST

replace name = subinstr(name, ",", "", .)

* Hand corrections to some first names
replace name="BERNADETTE" if name=="BERANDETTE"
replace name = 	"ABRAHAM"	if name==	"IBRAHEEM"
replace name = 	"ALBERTO"	if name==	"WUALBERTO"
replace name = 	"ANN"	if name==	"ANN-MARIE"
replace name = 	"ANN"	if name==	"NELLIEANN"
replace name = 	"ANNA"	if name==	"MARGARET-ANNE"
replace name = 	"ANNA"	if name==	"MARTHAANE"
replace name = 	"ANTHONY"	if name==	"MARCANTHONY"
replace name = 	"ANTHONY"	if name==	"MARK-ANTHONY"
replace name = 	"ARTURO"	if name==	"ARTUTRO"
replace name = 	"BARBARA"	if name==	"BAARBARA"
replace name = 	"BENJAMIN"	if name==	"BENJERMAN"
replace name = 	"CAROL"	if name==	"JO-CAROL"
replace name = 	"CHARLES"	if name==	"DANCHARLES"
replace name = 	"CHARLOTTE"	if name==	"CHARLOTEE"
replace name = 	"CHRISTINA"	if name==	"CHRISTIMA"
replace name = 	"CONRAD"	if name==	"CONRADE"
replace name = 	"DEBORAH"	if name==	"DEBRRAH"
replace name = 	"DEBRA"	if name==	"DEBBIEDEBRA"
replace name = 	"DESTINY"	if name==	"DESTINIE"
replace name = 	"ELEANOR"	if name==	"ELEANDOR"
replace name = 	"ELLEN"	if name==	"ELLLEN"
replace name = 	"FERNANDO"	if name==	"FERNANDIO"
replace name = 	"FERNANDO"	if name==	"FRENANDO"
replace name = 	"FRANCIS"	if name==	"FRANSIS"
replace name = 	"GABRIELLE"	if name==	"GARBRIELLE"
replace name = 	"GORDON"	if name==	"G GORDON"
replace name = 	"HELENA"	if name==	"HELIENA"
replace name = 	"HUSSEIN"	if name==	"HOSSEIN"
replace name = 	"JAMES"	if name==	"JAMES-JIM"
replace name = 	"JOANN"	if name==	"JO-ANN"
replace name = 	"JOANNE"	if name==	"JONANNE"
replace name = 	"JOHN"	if name==	"JOHN-DAVID"
replace name = 	"JOHN"	if name==	"JOHN-MARC"
replace name = 	"JON"	if name==	"JON-MICHAEL"
replace name = 	"JONATHAN"	if name==	"JOANATHAN"
replace name = 	"JUAN"	if name==	"JUAN-CARLOS"
replace name = 	"JULIANNE"	if name==	"JULLIANNE"
replace name = 	"KATIE"	if name==	"CATIE"
replace name = 	"LEONARD"	if name==	"LEONDARD"
replace name = 	"LINDSAY"	if name==	"LINDAY"
replace name = 	"LIZETTE"	if name==	"LIZETT"
replace name = 	"LORENZO"	if name==	"LORENZIO"
replace name = 	"LORRAINE"	if name==	"LORRANIE"
replace name = 	"LUCY"	if name==	"LOUCY"
replace name = 	"MARGARITA"	if name==	"MARGHARITA"
replace name = 	"MARILYN"	if name==	"MARIILYN"
replace name = 	"MARLENE"	if name==	"MARLEINE"
replace name = 	"MARTIN"	if name==	"VAN-MARTIN"
replace name = 	"MIKE"	if name==	"K A MIKE"
replace name = 	"PAUL"	if name==	"PAUL-DEAN"
replace name = 	"RAYMOND"	if name==	"WRAYMOND"
replace name = 	"ROSEANNA"	if name==	"ROSAENA"
replace name = 	"ROXANNA"	if name==	"ROXSANA"
replace name = 	"SCOTT"	if name==	"KSCOTT"
replace name = 	"STEPHANIE"	if name==	"SEPHANIE"
replace name = 	"THOMAS"	if name==	"CATHOMAS"
replace name = 	"VLADIMIR"	if name==	"VALDIMIR"

** Census **

sort name
merge name using census-names-sex.dta
tab _m
drop if _m==2
drop _m

tab sex_final, m

** SSA **
* For names that did not merge, use the SSA baby names from 1940-1970

sort name
merge name using ssa-names-sex
tab _m
drop if _m==2

* Names without any gender assignment
count
count if sex_final==. & sex_ssa==.
 
rename sex_final sex_census
tab sex_census sex_ssa, m

* Generate sex variable based on the classifications
gen sex = sex_census 
replace sex = sex_ssa if (sex_census==999 | sex_census==.) & sex_ssa!=.

* There are names where there is a true disagreement (SSA and Census disagree on gender); make these "cannot be classified"
list name if (sex_census==0 & sex_ssa==1) | (sex_census==1 & sex_ssa==0)
replace sex = 999 if (sex_census==0 & sex_ssa==1) | (sex_census==1 & sex_ssa==0)

tab sex, m

**********************************
* Gender of opponent 
* (only marginal candidates)
**********************************

forval x=1995/2014 {
	
	bys raceid`x' (rank`x' LAST): gen opponent_sex`x' = sex[_n+1] if rank`x'==vote`x' 
	bys raceid`x' (rank`x' LAST): replace opponent_sex`x' = sex[_n-1] if rank`x'==(vote`x'+1) 
	
	* deal with ties
	duplicates tag raceid`x' rank`x', g(tag)
	
	* single winner, tie for runner up 
	bys raceid`x' term`x' (rank`x' LAST): replace opponent_sex`x' = sex[_n-1] if rank`x'==2  & vote`x'==1 & tag==1 & _n==2
	bys raceid`x' term`x' (rank`x' LAST): replace opponent_sex`x' = sex[_n-2] if rank`x'==2  & vote`x'==1 & tag==1 & _n==3
	
	* multi winner, twoway tie for last winner
	bys raceid`x' term`x' (rank`x' LAST): replace opponent_sex`x' = sex[_n+2] if rank`x'==(vote`x'-1) & tag==1 & _n==(vote`x'-1)
	bys raceid`x' term`x' (rank`x' LAST): replace opponent_sex`x' = sex[_n+1] if rank`x'==(vote`x'-1) & tag==1 & _n==vote`x'

	* multi winner, threeway tie for last winner
	bys raceid`x' term`x' (rank`x' LAST): replace opponent_sex`x' = sex[_n+3] if rank`x'==(vote`x'-2) & tag==2 & _n==(vote`x'-2)
	bys raceid`x' term`x' (rank`x' LAST): replace opponent_sex`x' = sex[_n+2] if rank`x'==(vote`x'-2) & tag==2 & _n==(vote`x'-1)
	bys raceid`x' term`x' (rank`x' LAST): replace opponent_sex`x' = sex[_n+1] if rank`x'==(vote`x'-2) & tag==2 & _n==(vote`x')

	* multi winner, twoway tie for runner up
	bys raceid`x' term`x' (rank`x' LAST): replace opponent_sex`x' = sex[_n-1] if rank`x'==(vote`x'+1) & tag==1 & _n==(vote`x'+1)
	bys raceid`x' term`x' (rank`x' LAST): replace opponent_sex`x' = sex[_n-2] if rank`x'==(vote`x'+1) & tag==1 & _n==(vote`x'+2)

	* multi winner, threeway tie for runner up
	bys raceid`x' term`x' (rank`x' LAST): replace opponent_sex`x' = sex[_n-1] if rank`x'==(vote`x'+1) & tag==2 & _n==(vote`x'+1)
	bys raceid`x' term`x' (rank`x' LAST): replace opponent_sex`x' = sex[_n-2] if rank`x'==(vote`x'+1) & tag==2 & _n==(vote`x'+2)
	bys raceid`x' term`x' (rank`x' LAST): replace opponent_sex`x' = sex[_n-3] if rank`x'==(vote`x'+1) & tag==2 & _n==(vote`x'+3)
	
	drop tag
}
 
************************************************
* Define first election and outcome variable 
************************************************

* Participation within 4 years
foreach var in election voteshare elected vote num_cand margin rank opponent_sex totalvotes {
	gen first_`var' = .
}
gen double first_raceid = .
gen first_occ = ""
gen first_occ_group = .
gen first_incumbent = ""
gen first_office = ""
gen first_officecat = .
gen first_officecat2 = .
gen run_again = .
gen win = .
gen win_any = .
gen margin_next = .
gen office_next = ""
gen place_next = ""
gen first_place = ""
gen second_election = .

order year1995 year_runagain1995 year1996 year_runagain1996 year1997 year_runagain1997 year1998 year_runagain1998 year1999 year_runagain1999 ///
	  year2000 year_runagain2000 year2001 year_runagain2001 year2002 year_runagain2002 year2003 year_runagain2003 year2004 year_runagain2004 ///
	  year2005 year_runagain2005 year2006 year_runagain2006 year2007 year_runagain2007 year2008 year_runagain2008 year2009 year_runagain2009 ///
	  year2010 year_runagain2010 year2011 year_runagain2011 year2012 year_runagain2012 year2013 year_runagain2013 year2014 year_runagain2014 

forval x=1995/2010 {
	
	* Determine first election observed
	egen y = rowtotal(year1995-year_runagain`x')
	replace first_election = `x' if year`x'==1 & y==1 & first_election==.	 
	replace first_election = `x' if year`x'==1 & y==2 & year_runagain`x'==1 & first_election==.	 
	drop y
	
	* Determine participation within four years of first election
	local next = `x'+1
	local next4 = `x'+4

	disp "`next'" "  `next4'"	
	
	egen y = rowtotal(year`next'-year_runagain`next4')
	replace run_again = y if first_election==`x'
	
	* For winning, only take next election participated in (not any election in the next four years)
	forval z=`next'(1)`next4' {
		replace second_election = `z' if first_election==`x' & (elected`z'==1 | elected`z'==2) & (win==.)
		
		replace win = 1 if first_election==`x' & elected`z'==1 & (win==.)
		replace win = 0 if first_election==`x' & elected`z'==2 & (win==.)
				
		replace margin_next = margin`z' if first_election==`x' & (margin_next==.) 
		
		replace office_next = office`z' if first_election==`x' & (office_next=="") 
		
		replace place_next = place`z' if first_election==`x' & (place_next=="") 
		
		assert win!=. if first_election==`x' & (elected`z'_runagain==1 | elected`z'_runagain==2) 
		
		* Second win variable with any win in next four years
		replace win_any = 1 if first_election==`x' & elected`z'==1 & (win_any==. | win_any==0)
		replace win_any = 0 if first_election==`x' & elected`z'==2 & (win_any==.)

		replace win_any = 1 if first_election==`x' & elected`z'_runagain==1 & (win_any==. | win_any==0)
		replace win_any = 0 if first_election==`x' & elected`z'_runagain==2 & (win_any==.)
		
	}
	
	drop y
	
	foreach var in voteshare elected occ_group occ vote raceid incumbent num_cand office officecat officecat2 margin rank opponent_sex place totalvotes {
		replace first_`var' = `var'`x' if first_election==`x'
	}
}

* Participation within 8 years
gen run8 = .
gen win8 = .
gen win8_any = .
gen margin8 = .
gen office8 = ""
gen place8 = ""

forval x=1995/2006 {
	
	* Determine participation within 8 years of first election
	local next = `x'+1
	local next8 = `x'+8

	disp "`next'" "  `next8'"	
	
	egen y = rowtotal(year`next'-year_runagain`next8')
	replace run8 = y if first_election==`x'
	
	* For winning, only take next election participated in (not any election in the next 8 years)
	forval z=`next'(1)`next8' {
		replace win8 = 1 if first_election==`x' & elected`z'==1 & (win8==.)
		replace win8 = 0 if first_election==`x' & elected`z'==2 & (win8==.)
				
		replace margin8 = margin`z' if first_election==`x' & (margin8==.) 

		replace office8 = office`z' if first_election==`x' & (office8=="") 
		
		replace place8 = place`z' if first_election==`x' & (place8=="") 

		* Second win variable with any win in next 8 years
		replace win8_any = 1 if first_election==`x' & elected`z'==1 & (win8_any==. | win8_any==0)
		replace win8_any = 0 if first_election==`x' & elected`z'==2 & (win8_any==.)

		replace win8_any = 1 if first_election==`x' & elected`z'_runagain==1 & (win8_any==. | win8_any==0)
		replace win8_any = 0 if first_election==`x' & elected`z'_runagain==2 & (win8_any==.)
	}
	drop y
	
}

* Among those who won their first election, track their next two election outcomes
gen second_run4 = .
gen second_win4 = . 
gen second_margin4 = .
gen second_office4 = ""
gen second_place4 = ""

forval x=1996/2010 {
	
	* Determine participation within 4 years of second election
	local next = `x'+1
	local next4 = `x'+4

	disp "`next'" "  `next4'"	
	
	egen y = rowtotal(year`next'-year_runagain`next4')
	replace second_run4 = y if second_election==`x'
	
	* For winning, only take next election participated in (not any election in the next 4 years)
	forval z=`next'(1)`next4' {
		replace second_win4 = 1 if second_election==`x' & elected`z'==1 & (second_win4==.)
		replace second_win4 = 0 if second_election==`x' & elected`z'==2 & (second_win4==.)
				
		replace second_margin4 = margin`z' if second_election==`x' & (second_margin4==.) 

		replace second_office4 = office`z' if second_election==`x' & (second_office4=="") 
		
		replace second_place4 = place`z' if second_election==`x' & (second_place4=="") 
	}
	drop y
	
}


* Classify offices
global year = "_next"
do 04b-classify-office.do

global year = "8"
do 04b-classify-office.do

* Not all non-missing due to some first elections post-2010
tab first_election, m
egen y = rowtotal(year2010-year_runagain2014)
tab y if first_election==., m
drop y

* Run again
tab run_again if first_election!=., m

* Wins
tab win if first_election!=. & run_again!=0, m
outsheet using nowin-data.csv if first_election!=. & run_again!=0 & win==., comma replace

* Incumbent during first election observed
replace first_incumbent="1" if first_incumbent=="Y"
replace first_incumbent="0" if first_incumbent=="N"
destring first_incum, replace 

* In office during first election observed
gen first_inoffice = 1 if first_incumbent==1 | first_occ_group==31
replace first_inoffice = 0 if first_incumbent==0 & first_occ_group!=31

* Won first election observed
replace first_elected=0 if first_elected==2
replace first_elected=. if first_elected==3

* typo with the raceid or the runoffs; or number of candidates or elected
drop if first_raceid==199600077
drop if first_raceid==199801352
drop if first_raceid==199600994
drop if first_raceid==199601356	
drop if raceid1996==199601286

* one obs without election outcome, lost
replace first_elected=0 if first_elected==. & first_margin!=. & first_raceid==199600024

assert first_margin>0 if first_elected==1
assert first_margin<0 if first_elected==0 & first_margin!=.
assert first_elected!=. if first_margin!=.

* Run at all variable
gen run_atall = 1 if run_again>0 & run_again!=.
replace run_atall = 0 if run_again==0
assert run_atall==. if first_election>2010

gen run8_atall = 1 if run8>0 & run8!=.
replace run8_atall = 0 if run8==0
assert run8_atall==. if first_election>2006

* Win unconditional on running: if did not run again, set to zero
gen win_uncond = win 
replace win_uncond = 0 if run_atall==0

gen win8_uncond = win8 
replace win8_uncond = 0 if run8_atall==0

* Vote share in next election: if did not run again, set to zero 
replace margin_next = 0 if run_atall==0

* Female 
gen female = 1 if sex==1
replace female = 0 if sex==0

tab first_elected, m
tab first_elected sex, m
tab run_again, m
tab run_again sex, m

* Categorize offices 
gen officecat = first_officecat
gen officecat2 = first_officecat2

label val officecat officecat	

tab officecat if first_election!=., m
tab first_office if officecat==.

label val officecat2 officecat2	

tab officecat2 if first_election!=., m
tab first_office if officecat2==.

drop _m

********************************
* Merge in city population data
********************************

gen cityname = first_place

* clean up city names for city council elections
replace cityname = subinstr(cityname, "CITY", "", .) if officecat==3
replace cityname = subinstr(cityname, "-RECALL", "", .) if officecat==3
replace cityname = subinstr(cityname, "-R", "", .) if officecat==3
replace cityname = subinstr(cityname, "(PROPOSED)", "", .) if officecat==3
replace cityname = subinstr(cityname, "-PROPOSED", "", .) if officecat==3
replace cityname = subinstr(cityname, "-AT LARGE", "", .) if officecat==3
replace cityname = subinstr(cityname, "-P", "", .) if officecat==3
replace cityname = trim(cityname)

replace cityname = "CARPINTERIA" if cityname=="CARPENTERIA"
replace cityname = "CRESCENT" if cityname=="CRESENT"
replace cityname = "EAST PALO ALTO" if cityname=="EAST PAOLO ALTO"
replace cityname = "FONTANA" if cityname=="FONTANTA"
replace cityname = "HAWAIIAN GARDENS" if cityname=="HAWIIAN GARDENS"
replace cityname = "LAGUNA NIGUEL" if cityname=="LAGUNA NIGEL"
replace cityname = "LAGUNA WOODS" if cityname=="LAGUNA WOODS/LAGUNA VERDE"
replace cityname = "LOMPOC" if cityname=="LOMPC"
replace cityname = "MAMMOTH LAKES" if cityname=="MAMMOTH"
replace cityname = "MILPITAS" if cityname=="MILIPITAS"
replace cityname = "MONTAGUE" if cityname=="MONTEGUE"
replace cityname = "MONTEREY" if cityname=="MONTERERY"
replace cityname = "MORRO BAY" if cityname=="MORROW BAY"
replace cityname = "MOUNT SHASTA" if cityname=="MT. SHASTA"
replace cityname = "MURRIETA" if cityname=="MURIETTA"
replace cityname = "ANGELS" if cityname=="OF ANGELS"
replace cityname = "COMMERCE" if cityname=="OF COMMERCE"
replace cityname = "ORANGE COVE" if cityname=="ORANGE GROVE"
replace cityname = "PASO ROBLES" if cityname=="PASA ROBLES"
replace cityname = "POINT ARENA" if cityname=="PT. ARENA"
replace cityname = "RANCHO SANTA MARGARITA" if cityname=="RANCH SANTA MARGARITA"
replace cityname = "RANCHO PALOS VERDES" if cityname=="RANCHO PALOS VERDE"
replace cityname = "ROCKLIN" if cityname=="ROCKIN"
replace cityname = "SAN BUENAVENTURA (VENTURA)" if cityname=="SAN BUENA VENTURA" | cityname=="SAN BUENAVENTURA"
replace cityname = "SARATOGA" if cityname=="SATATOGA"
replace cityname = "SEBASTOPOL" if cityname=="SEBASTOPOOL"
replace cityname = "TEHACHAPI" if cityname=="TECHACHAPI"
replace cityname = "WILLOWS" if cityname=="WILLLOWS"
replace cityname = "CARMEL VALLEY VILLAGE" if cityname=="CARMEL VALLEY"
replace cityname = "EL DORADO HILLS" if cityname=="EL DORADO"
replace cityname = "INGLEWOOD" if cityname=="INGELWOOD"
replace cityname = "SANTA BARBARA" if cityname=="SANTA BARABARA"
replace cityname = "APPLE VALLEY" if cityname=="TOWN OF APPLE VALLEY"
replace cityname = "YUCCA VALLEY" if cityname=="TOWN OF YUCCA VALLEY"

replace cityname = trim(cityname)

merge m:1 cityname using citypop.dta
tab _m
drop if _m==2

* city council elections with out big city designation
tab first_place if _m==1 & officecat==3, m
drop _m

*********************************
* Compute female representation
*********************************

preserve

drop if female==.

keep id CNTYNAME* place* female officecat* elected* year*

drop CNTYNAME officecat officecat2 *runagain

forval y=1995/2014 {
	keep if (elected`y'==1 & year`y'==1) | year`y'==.
}

keep if year1995==1 | year1996==1 | year1997==1

keep id CNTYNAME1995 CNTYNAME1996 CNTYNAME1997 female officecat21995 officecat21996 officecat21997 elected1995 elected1996 elected1997 year1995 year1996 year1997

reshape long CNTYNAME officecat2 elected, i(id female) j(year)

keep if elected==1

*mean
bys CNTYNAME: egen mean_female_county = mean(female) 
bys CNTYNAME officecat2: egen mean_female_county_office = mean(female)

*count
bys CNTYNAME: egen count_female_county = count(female) 
bys CNTYNAME officecat2: egen count_female_county_office = count(female) 

bys CNTYNAME officecat2: keep if _n==1
keep CNTYNAME officecat2 mean* count*

* higher/lower than median
summ mean_female_county_office, det
gen high_female = mean_female_county_office > r(p50)

* within office, higher/lower than the median
gen high_female_office = .
forval x=1/6 {
	summ mean_female_county_office if officecat2==`x', det
	replace high_female_office = mean_female_county_office > r(p50) if officecat2==`x'
}

save female-rep, replace

restore

****************
* Clean place
****************

replace place_next=upper(place_next)

rename first_place place
gen place_mod = ""
do 04c-classify-place.do

rename place first_place
rename place_mod first_place_mod

rename place_next place
gen place_mod = ""
do 04c-classify-place.do

rename place place_next
rename place_mod place_next_mod

replace first_place_mod = trim(first_place_mod)
replace place_next_mod = trim(place_next_mod)

gen same = 1 if officecat2==officecat2_next & first_place_mod==place_next_mod
replace same = 0 if officecat2!=. & officecat2_next!=. & (officecat2!=officecat2_next | first_place_mod!=place_next_mod)

*********************************
* Clean up and label variables
*********************************

* Drop variables
drop *199* *200* *201* sex_census sex_ssa flag_ssa occ_clean NAME name ULAST UFIRST UCNTYNAME BALDESIG place_next_mod first_place_mod

* Label
label var FIRST "first name"
label var LAST "last name"
label var id "candidate id (user generated)"
label var CNTYNAME "county name"
label var sex "sex 0=male, 1=female"
label val sex sex
label define first_elected 0 "not elected" 1 "elected"
label val first_elected first_elected
label var first_margin "Margin of victory/loss in first election"
label var run_atall "Probability ran in a subsequent election"
label var first_election "first election year"
label var first_voteshare "first election vote share"
label var first_elected "first election outcome"
label var first_vote "first election number elected"
label var first_num_cand "first election number candidates"
label var first_rank "first election rank"
label var first_opponent_sex "first election opponent sex"
label var first_totalvotes "first election total votes cast"
label var first_raceid "first election raceid"
label var first_occ "first election occupation"
label var first_occ_group "first election occupation group"
label var first_incumbent "first election incumbency status"
label var first_office "first election office"
label var first_officecat "first election office category v1"
label var first_officecat2 "first election office category v2"
label var run_again "# runs again within 4 years"
label var win "conditional on running again in 4, wins next election"
label var win_any "conditional on running again in 4, wins any election in 4 years"
label var margin_next "conditional on running again in 4, next election margin of victory"
label var office_next "conditional on running again in 4, next election office"
label var place_next "conditional on running again in 4, next election place"
label var second_election "conditional on running again in 4, next election year"
label var first_place "first election place"
label var run8 "runs again within 8 years"
label var win8 "conditional on running again in 8, wins next election"
label var win8_any "conditional on running again in 8, wins any election in 4 years"
label var margin8 "conditional on running again in 8, margin of victory"
label var office8 "conditional on running again in 8, next election office"
label var place8 "conditional on running again in 8, next election place"
label var second_run4 "conditional on winning first election and running again in 4, third run"
label var second_win4 "conditional on winning first election and running again in 4, election outcome for third run"
label var second_margin4 "conditional on winning first election and running again in 4, margin of victory for third run"
label var second_office4 "conditional on winning first election and running again in 4, office for third run"
label var second_place4 "conditional on winning first election and running again in 4, election place for third run"
label var officecat_next "conditional on running again in 4, next election office category v1"
label var officecat2_next "conditional on running again in 4, next election office category v2"
label var officecat8 "conditional on running again in 8, office category v1"
label var officecat28 "conditional on running again in 8, office category v2"
label var first_inoffice "in office during first observed election"
label var run8_atall "Probability ran in a subsequent election in 8 years"
label var win_uncond "Probability won an election in 4 years, unconditional on running again"
label var win8_uncond "Probability won an election in 8 years, unconditional on running again"
label var female "Female"
label var cityname "first election city name"
label var bigcity "first election in a big city"
label var officecat "first election office category v1"
label var officecat2 "first election office category v2"
label var same "conditional on running again, ran for same exact office"

***********************
* Sample restrictions
***********************

save ca-election-full, replace

count 

* Drop individuals whose first election is post-2010 (do not have a four year window afterwards to examine)
drop if first_election==.
count 

* Drop incumbents
drop if first_incumbent==1
count 

* Drop individuals who are (1) in office in their first race (appointed or elected) or
*						   (2) an officeholder according to their ballot designation						
replace first_inoffice = 1 if strpos(first_occ, "APPOINTED")!=0 
drop if first_inoffice==1
drop if first_occ_group==32
count 

* Drop individuals who are in races with only one candidate
drop if first_num_cand==1
count 

* Drop individuals with no office categorization 
drop if officecat==.
count 

* Drop names that can't be classified (either ambiguous gender or not in my names database) 
replace sex=. if sex==999
drop if sex==.
count 

save ca-election, replace

rm occ-temp.dta
log close

